additions
[TesnorFlow-Exercises.git] / Udacity Exercises / Assignment2.py
blob6ad688feab4ac0ec39fbef67557e308dddb0deb0
1 '''
2 Exercise done in pycharm for logic and jupyter notebook for Ipython viewing
4 ISSUE: sanitized is probably broken
6 '''
7 # These are all the modules we'll be using later. Make sure you can import them
8 # before proceeding further.
9 import numpy as np
10 import tensorflow as tf
11 from six.moves import cPickle as pickle
12 from six.moves import range
15 # Config the matplotlib backend as plotting inline in IPython
16 '''%matplotlib inline'''
18 pickle_file = 'notMNIST.pickle'
20 with open(pickle_file, 'rb') as f:
21 save = pickle.load(f)
22 train_dataset = save['train_dataset']
23 train_labels = save['train_labels']
24 valid_dataset = save['valid_dataset']
25 valid_labels = save['valid_labels']
26 test_dataset = save['test_dataset']
27 test_labels = save['test_labels']
28 del save # hint to help gc free up memory
29 print('Training set', train_dataset.shape, train_labels.shape)
30 print('Validation set', valid_dataset.shape, valid_labels.shape)
31 print('Test set', test_dataset.shape, test_labels.shape)
33 pickle_file_sanitized = 'notMNIST_sanitized.pickle'
34 with open(pickle_file_sanitized, 'rb') as f:
35 save_san = pickle.load(f)
36 train_dataset_sanitized = save_san['train_dataset']
37 train_labels_sanitized = save_san['train_labels']
38 valid_dataset_sanitized = save_san['valid_dataset']
39 valid_labels_sanitized = save_san['valid_labels']
40 test_dataset_sanitized = save_san['test_dataset']
41 test_labels_sanitized = save_san['test_labels']
43 image_size = 28
44 num_labels = 10
46 def reformat(dataset, labels):
47 dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32) #convert from 3D to 2D with index dimension
48 # Map 0 to [1.0, 0.0, 0.0 ...], 1 to [0.0, 1.0, 0.0 ...]
49 labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
50 return dataset, labels
52 train_dataset, train_labels = reformat(train_dataset, train_labels)
53 valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
54 test_dataset, test_labels = reformat(test_dataset, test_labels)
55 print('---\nTraining set', train_dataset.shape, train_labels.shape)
56 print('Validation set', valid_dataset.shape, valid_labels.shape)
57 print('Test set', test_dataset.shape, test_labels.shape)
59 train_dataset_sanitized, train_labels_sanitized = reformat(train_dataset_sanitized, train_labels_sanitized)
60 valid_dataset_sanitized, valid_labels_sanitized = reformat(valid_dataset_sanitized, valid_labels_sanitized)
61 test_dataset_sanitized, test_labels_sanitized = reformat(test_dataset_sanitized, test_labels_sanitized)
62 print('---\nTraining set Sant', train_dataset_sanitized.shape, train_labels_sanitized.shape)
63 print('Validation set Sant', valid_dataset_sanitized.shape, valid_labels_sanitized.shape)
64 print('Test set Sant', test_dataset_sanitized.shape, test_labels_sanitized.shape)
66 # With gradient descent training, even this much data is prohibitive.
67 # Subset the training data for faster turnaround.
68 train_subset = 10000 #gets a 10.5%, as good as a random pick
70 learning_rate = 0.01
72 graph = tf.Graph() #Graph is a set of tf.Operation objects repreesnting units of compuitation and td.Tensor objects represting data to flow between operations
73 #Important note: This class is not thread-safe for graph construction. All operations should be created from a single thread, or external synchronization must be provided. Unless otherwise specified, all methods are not thread-safe.
74 with graph.as_default():
75 # Input data.
76 # Load the training, validation and test data into constants that are
77 # attached to the graph.
78 tf_train_dataset = tf.constant(train_dataset[:train_subset, :]) #a constant is given a dtype with arguments and optional shape
79 '''
80 # Constant 1-D Tensor populated with value list.
81 tensor = tf.constant([1, 2, 3, 4, 5, 6, 7]) => [1 2 3 4 5 6 7]
83 # Constant 2-D tensor populated with scalar value -1.
84 tensor = tf.constant(-1.0, shape=[2, 3]) => [[-1. -1. -1.]
85 [-1. -1. -1.]]
86 '''
87 tf_train_labels = tf.constant(train_labels[:train_subset])
88 tf_valid_dataset = tf.constant(valid_dataset)
89 tf_test_dataset = tf.constant(test_dataset)
90 '''The above does not change'''
92 # Variables.
93 # These are the parameters that we are going to be training. The weight
94 # matrix will be initialized using random values following a (truncated)
95 # normal distribution. The biases get initialized to zero.
96 weights = tf.Variable( #variable mainstainsts state when graph calls run(). Variable takes an initial value which can be a tensor of any type. After construction type and shape are fixed
97 tf.truncated_normal([image_size * image_size, num_labels])) #truncated_normal outputs a random value from a truncated normal distribution
98 biases = tf.Variable(tf.zeros([num_labels])) #variables are assigned to the graph
99 '''These will adjust to give values'''
101 # Training computation.
102 # We multiply the inputs with the weight matrix, and add biases. We compute
103 # the softmax and cross-entropy (it's one operation in TensorFlow, because
104 # it's very common, and it can be optimized). We take the average of this
105 # cross-entropy across all training examples: that's our loss.
106 logits = tf.matmul(tf_train_dataset, weights) + biases #matrix multiply ojbect of wieghts and Training with biases, stored for adjustment
108 logit function is the inverse of the sigmoidal "logistic" function represents a probability p, logit function gives the log-odds, or the logarithm of the odds p/(1 − p).
109 https://stackoverflow.com/questions/41455101/what-is-the-meaning-of-the-word-logits-in-tensorflow
110 Logit is a function that maps probabilities [0, 1] to [-inf, +inf].
111 Softmax is a function that maps [-inf, +inf] to [0, 1] similar as Sigmoid. But Softmax also normalizes the sum of the values(output vector) to be 1.
112 Tensorflow "with logit": It means that you are applying a softmax function to logit numbers to normalize it. The input_vector/logit is not normalized and can scale from [-inf, inf].
114 # loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels)) #Computes softmax cross entropy between logits and labels. (deprecated)
115 loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=tf_train_labels)) #finds cross entropy between logits and lables. Excpects unscaled logits since performs softmax on logits for efficiency
116 # Optimizer.
117 # We are going to find the minimum of this loss using gradient descent.
118 optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss) # Optimizer for gradient descent. Has learning rate and locks for updates. Follows by minimize with variable containing the value to minimize.
120 # Predictions for the training, validation, and test data.
121 # These are not part of training, but merely here so that we can report
122 # accuracy figures as we train.
123 train_prediction = tf.nn.softmax(logits) #computes softmax activations retuyrnign tensor of same shape as logits.
124 valid_prediction = tf.nn.softmax(
125 tf.matmul(tf_valid_dataset, weights) + biases)
126 test_prediction = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)
128 num_steps = 801
130 def accuracy(predictions, labels):
131 return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
132 / predictions.shape[0])
136 with tf.Session(graph=graph) as session:
137 # This is a one-time operation which ensures the parameters get initialized as
138 # we described in the graph: random weights for the matrix, zeros for the
139 # biases.
140 tf.global_variables_initializer().run()
141 print('Initialized')
142 for step in range(num_steps):
143 # Run the computations. We tell .run() that we want to run the optimizer,
144 # and get the loss value and the training predictions returned as numpy
145 # arrays.
146 _, l, predictions = session.run([optimizer, loss, train_prediction])
147 if (step % 100 == 0):
148 print('Loss at step %d: %f' % (step, l))
149 print('Training accuracy: %.1f%%' % accuracy(
150 predictions, train_labels[:train_subset, :]))
151 # Calling .eval() on valid_prediction is basically like calling run(), but
152 # just to get that one numpy array. Note that it recomputes all its graph
153 # dependencies.
154 print('Validation accuracy: %.1f%%' % accuracy(
155 valid_prediction.eval(), valid_labels))
156 print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))
159 '''SGD'''
160 print("-----------\nSDG UNSANITIZED")
162 #learning_rate = 0.01
163 batch_size = 128 #63-66.3% test accuracy @ LL 0.01
164 #batch_size = 10000 #decent test accuracy of 74.2%,
166 graph = tf.Graph()
167 with graph.as_default():
169 # Input data. For the training data, we use a placeholder that will be fed
170 # at run time with a training minibatch.
171 tf_train_dataset = tf.placeholder(tf.float32, #placeholders are always fed. Takes the type of elements to be fed. Optional shape of tensor. Name for tessor operation
172 shape=(batch_size, image_size * image_size))
173 tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
174 tf_valid_dataset = tf.constant(valid_dataset)
175 tf_test_dataset = tf.constant(test_dataset)
177 # Variables.
178 weights = tf.Variable(
179 tf.truncated_normal([image_size * image_size, num_labels]))
180 biases = tf.Variable(tf.zeros([num_labels]))
182 # Training computation.
183 logits = tf.matmul(tf_train_dataset, weights) + biases
184 #loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels))
185 loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=tf_train_labels)) #finds cross entropy between logits and lables. Excpects unscaled logits since performs softmax on logits for efficiency
188 # Optimizer.
189 optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
191 # Predictions for the training, validation, and test data.
192 train_prediction = tf.nn.softmax(logits)
193 valid_prediction = tf.nn.softmax(
194 tf.matmul(tf_valid_dataset, weights) + biases)
195 test_prediction = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)
197 num_steps = 3001
199 with tf.Session(graph=graph) as session:
200 tf.global_variables_initializer().run()
201 print("Initialized")
202 for step in range(num_steps):
203 # Pick an offset within the training data, which has been randomized.
204 # Note: we could use better randomization across epochs.
205 offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
206 # Generate a minibatch.
207 batch_data = train_dataset[offset:(offset + batch_size), :]
208 batch_labels = train_labels[offset:(offset + batch_size), :]
209 # Prepare a dictionary telling the session where to feed the minibatch.
210 # The key of the dictionary is the placeholder node of the graph to be fed,
211 # and the value is the numpy array to feed to it.
212 feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
213 _, l, predictions = session.run(
214 [optimizer, loss, train_prediction], feed_dict=feed_dict)
215 if (step % 500 == 0):
216 print("Minibatch loss at step %d: %f" % (step, l))
217 print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
218 print("Validation accuracy: %.1f%%" % accuracy(
219 valid_prediction.eval(), valid_labels))
220 print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))
223 print("------\nSDG SANITIZED")
225 #batch_size = 128 #59-63% test accuracy
226 #batch_size = 10000 #decent test accuracy of 71%,
228 graph = tf.Graph()
229 with graph.as_default():
230 tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size*image_size))
231 tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
232 tf_valid_dataset = tf.constant(valid_dataset_sanitized)
233 tf_test_dataset = tf.constant(test_dataset_sanitized)
235 weights = tf.Variable(tf.truncated_normal([image_size*image_size, num_labels]))
236 biases = tf.Variable(tf.zeros([num_labels]))
238 logits = tf.matmul(tf_train_dataset, weights) + biases
239 loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=tf_train_labels))
241 optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
243 # Predictions for the training, validation, and test data.
244 train_prediction = tf.nn.softmax(logits)
245 valid_prediction = tf.nn.softmax(
246 tf.matmul(tf_valid_dataset, weights) + biases)
247 test_prediction = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)
249 num_steps = 3001
250 with tf.Session(graph=graph) as session:
251 tf.global_variables_initializer().run() #Returns an Op that initializes global variables.
252 print("Initialized")
253 for step in range(num_steps):
254 offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
255 batch_data = train_dataset_sanitized[offset:(offset + batch_size), :]
256 batch_labels = train_labels_sanitized[offset:(offset + batch_size), :]
257 feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
258 _, l, predictions = session.run(
259 [optimizer, loss, train_prediction], feed_dict=feed_dict)
260 if (step % 500 == 0):
261 print("Minibatch loss at step %d: %f" % (step, l))
262 print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
263 print("Validation accuracy: %.1f%%" % accuracy(
264 valid_prediction.eval(), valid_labels_sanitized))
265 print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels_sanitized))
268 Problem
269 Turn the logistic regression example with SGD into a 1-hidden layer neural network
270 with rectified linear units nn.relu() and 1024 hidden nodes. This model should improve
271 your validation / test accuracy.
284 print("-----\n1 DEEP NN")
285 '''http://x-wei.github.io/dlMOOC_L2.html'''
286 batch_size = 128
287 num_hidden = 1024
289 learning_rate = 0.025
291 graph = tf.Graph()
292 with graph.as_default():
294 # Input data. For the training data, we use a placeholder that will be fed
295 # at run time with a training minibatch.
296 tf_train_dataset = tf.placeholder(tf.float32,
297 shape=(batch_size, image_size * image_size))
298 tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
299 tf_valid_dataset = tf.constant(valid_dataset)
300 tf_test_dataset = tf.constant(test_dataset)
302 # Variables for linear layer 1
303 W1 = tf.Variable(
304 tf.truncated_normal([image_size * image_size, num_hidden]))
305 b1 = tf.Variable(tf.zeros([num_hidden]))
307 # Hidden RELU input computation
308 y1 = tf.matmul(tf_train_dataset, W1) + b1
309 # Hidden RELU output computation
310 X1 = tf.nn.relu(y1)
312 # Variables for linear layer 2
313 W2 = tf.Variable(
314 tf.truncated_normal([num_hidden, num_labels]))#W2
315 b2 = tf.Variable(tf.zeros([num_labels])) #b2
316 # logit (y2) output
317 logits = tf.matmul(X1, W2) + b2
318 loss = tf.reduce_mean(
319 tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=tf_train_labels))
321 def getlogits(X):
322 y1 = tf.matmul(X, W1) + b1
323 X1 = tf.nn.relu(y1)
324 return tf.matmul(X1, W2) + b2
326 # Optimizer.
327 optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
329 # Predictions for the training, validation, and test data.
330 train_prediction = tf.nn.softmax(logits)
331 valid_prediction = tf.nn.softmax( getlogits(tf_valid_dataset) )
332 test_prediction = tf.nn.softmax( getlogits(tf_test_dataset))
334 #run sgd optimization:
336 num_steps = 3001
338 with tf.Session(graph=graph) as session:
339 tf.global_variables_initializer().run()
340 print("Initialized")
341 for step in range(num_steps):
342 # Pick an offset within the training data, which has been randomized.
343 # Note: we could use better randomization across epochs.
344 offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
345 # Generate a minibatch.
346 batch_data = train_dataset[offset:(offset + batch_size), :]
347 batch_labels = train_labels[offset:(offset + batch_size), :]
348 # Prepare a dictionary telling the session where to feed the minibatch.
349 # The key of the dictionary is the placeholder node of the graph to be fed,
350 # and the value is the numpy array to feed to it.
351 feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
352 _, l, predictions = session.run(
353 [optimizer, loss, train_prediction], feed_dict=feed_dict)
354 if (step % 500 == 0):
355 print("Minibatch loss at step %d: %f" % (step, l))
356 print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
357 print("Validation accuracy: %.1f%%" % accuracy(
358 valid_prediction.eval(), valid_labels))
359 print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))
360 print("done")